# The file preprocess the WVS and Arab-barometer datasets downloaded from the public repository 
# WV6_Data_spss_v_2015_04_18.sav is the WVS wave 6 (2010-2014) 
# downloded from https://www.worldvaluessurvey.org/WVSDocumentationWV6.jsp

# The GDP per capita data gdp.csv is downloaded from https://data.worldbank.org/

# ab-combined.rds is a dataset that combines waves 1 to 4 of the Arab Barometer 
# publicly available at https://www.arabbarometer.org/survey-data/data-downloads/

library(tidyverse)
library(haven)

# WVS ---------------------------------------------------------------------

wvs <- read_sav("data/WV6_Data_spss_v_2015_04_18.sav")

wvs_sub <- wvs %>% 
  mutate(
    cntry = as_factor(V2),
    wgt = zap_labels(V258),
    year = zap_labels(V262),
    #pfd
    demo6061 = ifelse(V60 == 3, 1, 0),
    demo6061 = ifelse((V60 == 1|V60 == 4) & 
                        V61 == 3, 1, demo6061),
    demo6263 = ifelse(V62 == 2|V62 == 4, 1, 0),
    demo6263 = ifelse(V62 == 3 & 
                        (V63 == 2|V63 == 4), 1, demo6263),
    demo6465 = ifelse(V64 == 2|V64 == 3, 1, 0),
    demo6465 = ifelse(V64 == 1 & 
                        (V65 == 2|V65 == 3), 1, demo6465),
    econ_sec = (V181 + V182)/2,
    econ_sec = (4 - econ_sec)/3,
    pers_sec = (4 - V170)/3,
    pfd = (demo6061 + demo6263 + demo6465)/3,
    trust_gov = (4 - V115)/3) %>% 
  select(cntry, year, wgt, econ_sec, pers_sec, trust_gov, pfd)

# Exclude Bahrain 
wvs_sub <- wvs_sub %>%
  filter(cntry != "Bahrain")

arab_countries <- c("Algeria" , "Palestine", "Iraq", "Jordan",
                    "Kuwait", "Lebanon", "Libya" , "Morocco",
                    "Qatar", "Egypt" , "Yemen", "Tunisia")

wvs_sub <- wvs_sub %>% 
  mutate(arab = ifelse(cntry %in% arab_countries, "Arab", "non-Arab"),
         # GDP for only 2011 is available in Libya
         year = ifelse(cntry == "Libya", 2011, year))

gdp <- read_csv2("data/gdp.csv") 

gdp <- select(gdp,
              cntry = `Country Name`, 
              cntry_code = `Country Code`,
              `2010`:`2014`)

repl_pattern <- c(
  "West Bank and Gaza" = "Palestine",
  "Russian Federation" = "Russia",
  "Egypt, Arab Rep." = "Egypt",
  "Hong Kong SAR, China" = "Hong Kong",
  "Kyrgyz Republic" = "Kyrgyzstan",
  "Korea, Rep." = "South Korea",
  "Korea South" = "South Korea",
  "Yemen, Rep." = "Yemen",
  "United Kingdom" = "Great Britain",
  "Vietnam" = "Viet Nam",
  "Iran, Islamic Rep." = "Iran",
  "Bosnia and Herzegovina" = "Bosnia",
  "Czech Republic" = "Czech Rep.",
  "Dominican Republic" = "Dominican Rep.",
  "Slovak Republic" = "Slovakia",
  "Macedonia, FYR" = "Macedonia",
  "Venezuela, RB" = "Venezuela",
  "Germany West" = "West Germany",
  "Germany East" = "East Germany"
)

gdp <- gdp %>% 
  mutate(cntry = str_replace_all(cntry, repl_pattern))

gdp <- gdp %>% 
  gather(year, gdpc, -cntry, -cntry_code) %>% 
  mutate(year = as.numeric(year))

wvs_sub <- left_join(wvs_sub, gdp)

wvs_aggr <- wvs_sub %>% 
  group_by(cntry, cntry_code, arab, gdpc) %>% 
  summarise_at(vars(econ_sec, pers_sec, trust_gov, pfd),
               ~(weighted.mean(., wgt, na.rm = TRUE)),
               .groups = "drop") 

write_csv(wvs_aggr, "wvs-aggr.csv")


# Arab barometer ----------------------------------------------------------

ab <- read_rds("data/ab-combined.rds")

ab <- ab %>% 
  group_by(country, wave) %>%   
  mutate(income = as.numeric(cut(q1015, 
                                 breaks = quantile(q1015, na.rm = T),
                                 include.lowest = TRUE))) %>%
  ungroup() %>%
  mutate(income = factor(income, labels = paste(1:4, "quart")),
         sex = q1002, 
         age_gr = cut(q1001, c(18, 29, 49, 100), include.lowest = TRUE,
                      label = c("18-29", "30-49", "50>")),
         edu = fct_collapse(q1003,
                            low = c("Illiterate/No formal education", "Elementary", "Prepartory/Basic"),
                            middle = c("Secondary", "Mid-level diploma (professional or technical"),
                            high = c("ba", "MA and above")),
         edu = ifelse(country != "Tunisia", edu,
                      fct_collapse(t1003,
                                   low = c("Illiterate/No formal education", "Elementary", "Prepartory/Basic"),
                                   middle = c("Secondary"),
                                   high = c("ba", "MA and above"))),
         edu = ifelse(!(country == "Yemen" & wave == 3), edu,
                      fct_collapse(y1003,
                                   low = c("Illiterate/No formal education", "Elementary", "Prepartory/Basic"),
                                   middle = c("Pre-high school diploma", "Secondary", "Mid-level diploma/professional or technical)"),
                                   high = c("ba", "MA and above")))) %>% 
  rename(econ_sec = q101,
         pers_sec = q105,
         demo_pref = q512,
         authorit_rule = q5183,
         relig_position = q6062,
         relig_govern = q6063,
         trust_governm = q2011,
         trust_courts = q2012, 
         trust_parlm = q2013, 
         trust_police = q2014, 
         trust_army = q2016)

ab <- ab %>% 
  mutate_at(vars(starts_with("q516")), ~((as.numeric(.) - 1)/3)) %>% 
  mutate_at(vars(econ_sec, pers_sec, q105b, 
                 authorit_rule,
                 relig_govern, relig_position,
                 starts_with("trust_")), 
            ~((4 - as.numeric(.))/3)) %>%
  mutate(wt = ifelse(wave == 1, 1, wt),
         #pers_sec q105 in the wave 1 recode from q105b
         pers_sec = ifelse(wave == 1, q105b, pers_sec),
         democr_approval = (q5161 + q5162 + q5163)/3,
         islamism  = (relig_govern + relig_position)/2,
         demo_auto = demo_pref - authorit_rule,
         demo_auto = (demo_auto + 1)/2) 

year <- tibble(wave = 1:4, year = c(2007, 2011, 2013, 2016))

ab <- left_join(ab, year)

ab <- select(ab,
             country, wave, year, wt, 
             econ_sec, pers_sec, 
             demo_pref, trust_governm,
             islamism,
             sex, age_gr, edu, income,
             trust_courts, trust_parlm,
             trust_police, trust_army,
             q5161, q5162, q5163, 
             demo_auto, democr_approval) 

write_csv(ab, "ab-security-and-values.csv")

